import random
import time

import requests
from lxml import etree



headers = {
    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1 Edg/131.0.0.0',
    'Cookie':'viewed="27104959"; bid=NmOAnY-VFEI; _vwo_uuid_v2=DD541ECF8E6C9D1BAABB0B9E1A532F9D8|e6e248ef92443ee8c98beaa155914aad; ap_v=0,6.0; __utma=30149280.1601933346.1736256352.1736256352.1736771158.2; __utmb=30149280.0.10.1736771158; __utmc=30149280; __utmz=30149280.1736771158.2.2.utmcsr=localhost:63342|utmccn=(referral)|utmcmd=referral|utmcct=/',
    'Referer':'https://movie.douban.com/explore'
}


content_list = []
for i in range(10):
    start = i*25
    url = f"https://movie.douban.com/review/best/?start={start}"
    response = requests.get(url, headers=headers)
    data = response.content.decode()
    tree = etree.HTML(data)
    id_list = tree.xpath('//div[@class="review-list chart "]/div')
    print(len(id_list))
    for card_id in id_list:
        print(card_id)
        url_content = f"https://movie.douban.com/j/review/{card_id}/full"
        content_response = requests.get(url, headers=headers)
        content_data = content_response.content.decode()
        print(content_response)
        time.sleep(random.randint(1, 4))
    time.sleep(random.randint(1,4))
    print("抓取中")


# 扁平化数组


# 去除乱码
# for i in range(len(title_list)):
#     title_list[i] = re.sub(r'\s+', '', title_list[i])
# print(title_list)